SKEW factorrs

HSI SKEW with hsioptionsfutures.rds

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.0      ✔ stringr 1.4.0 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(dplyr)
library(lubridate)
## 
## Attaching package: 'lubridate'
## 
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

1.Read the data

setwd("/Users/stevenqin/Desktop/5008 project")
h <- readRDS("hsioptionsfutures.rds")
hsi <- read_csv("HSI.csv")
## Rows: 494 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (6): Open, High, Low, Close, Adj Close, Volume
## date (1): Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
h
## # A tibble: 1,098,101 × 11
##    Date       Series     Market Market…¹ Commo…² Commo…³ Curre…⁴ Settl…⁵ Previ…⁶
##    <date>     <chr>      <chr>  <chr>    <chr>   <chr>   <chr>     <dbl>   <dbl>
##  1 2020-01-02 HSIF0      HSI    HANG SE… HSI     HANG S… HKD       28606   28270
##  2 2020-01-02 HSI20400A0 HSI    HANG SE… HSI     HANG S… HKD        8206    7870
##  3 2020-01-02 HSI20600A0 HSI    HANG SE… HSI     HANG S… HKD        8006    7670
##  4 2020-01-02 HSI20800A0 HSI    HANG SE… HSI     HANG S… HKD        7806    7470
##  5 2020-01-02 HSI21000A0 HSI    HANG SE… HSI     HANG S… HKD        7611    7271
##  6 2020-01-02 HSI21200A0 HSI    HANG SE… HSI     HANG S… HKD        7406    7071
##  7 2020-01-02 HSI21400A0 HSI    HANG SE… HSI     HANG S… HKD        7206    6871
##  8 2020-01-02 HSI21600A0 HSI    HANG SE… HSI     HANG S… HKD        7006    6671
##  9 2020-01-02 HSI21800A0 HSI    HANG SE… HSI     HANG S… HKD        6806    6471
## 10 2020-01-02 HSI22000A0 HSI    HANG SE… HSI     HANG S… HKD        6606    6271
## # … with 1,098,091 more rows, 2 more variables: Difference <dbl>,
## #   Impliedvolatility <dbl>, and abbreviated variable names ¹​MarketName,
## #   ²​Commodity, ³​CommodityName, ⁴​CurrencyCode, ⁵​SettlementPrice,
## #   ⁶​PreviousSettlementPrice
hsi
## # A tibble: 494 × 7
##    Date         Open   High    Low  Close `Adj Close`     Volume
##    <date>      <dbl>  <dbl>  <dbl>  <dbl>       <dbl>      <dbl>
##  1 2020-01-02 28249. 28544. 28246. 28544.      28544. 1262732800
##  2 2020-01-03 28828. 28883. 28428. 28452.      28452. 1797904800
##  3 2020-01-06 28326. 28368. 28054. 28226.      28226. 1793426600
##  4 2020-01-07 28353. 28473. 28264. 28322.      28322. 1302687200
##  5 2020-01-08 28000. 28199. 27858. 28088.      28088. 1709241600
##  6 2020-01-09 28368. 28561  28326. 28561       28561  1692786200
##  7 2020-01-10 28665. 28665. 28504. 28638.      28638. 1448401000
##  8 2020-01-13 28772. 28971. 28672. 28955.      28955. 1765055700
##  9 2020-01-14 29150. 29150. 28790. 28885.      28885. 1643504700
## 10 2020-01-15 28891. 28973. 28619. 28774.      28774. 1240120700
## # … with 484 more rows
hdate <- hsi$Date
dtotal <- data.frame(NULL)

for(i in 1:length(hdate)){
  dh <- h %>% filter(Date == hdate[i]) %>% mutate(ProductPrice = hsi$`Adj Close`[i])
  dtotal <- rbind(dtotal,dh)
}
dtotal
## # A tibble: 1,091,575 × 12
##    Date       Series     Market Market…¹ Commo…² Commo…³ Curre…⁴ Settl…⁵ Previ…⁶
##    <date>     <chr>      <chr>  <chr>    <chr>   <chr>   <chr>     <dbl>   <dbl>
##  1 2020-01-02 HSIF0      HSI    HANG SE… HSI     HANG S… HKD       28606   28270
##  2 2020-01-02 HSI20400A0 HSI    HANG SE… HSI     HANG S… HKD        8206    7870
##  3 2020-01-02 HSI20600A0 HSI    HANG SE… HSI     HANG S… HKD        8006    7670
##  4 2020-01-02 HSI20800A0 HSI    HANG SE… HSI     HANG S… HKD        7806    7470
##  5 2020-01-02 HSI21000A0 HSI    HANG SE… HSI     HANG S… HKD        7611    7271
##  6 2020-01-02 HSI21200A0 HSI    HANG SE… HSI     HANG S… HKD        7406    7071
##  7 2020-01-02 HSI21400A0 HSI    HANG SE… HSI     HANG S… HKD        7206    6871
##  8 2020-01-02 HSI21600A0 HSI    HANG SE… HSI     HANG S… HKD        7006    6671
##  9 2020-01-02 HSI21800A0 HSI    HANG SE… HSI     HANG S… HKD        6806    6471
## 10 2020-01-02 HSI22000A0 HSI    HANG SE… HSI     HANG S… HKD        6606    6271
## # … with 1,091,565 more rows, 3 more variables: Difference <dbl>,
## #   Impliedvolatility <dbl>, ProductPrice <dbl>, and abbreviated variable names
## #   ¹​MarketName, ²​Commodity, ³​CommodityName, ⁴​CurrencyCode, ⁵​SettlementPrice,
## #   ⁶​PreviousSettlementPrice
  1. Find the separate factors
place <- data.frame(seperate=which(nchar(dtotal$Series)==5|nchar(dtotal$Series)==8))
lastnumber <- data.frame(seperate=dim(dtotal)[1])
place <- rbind(place,lastnumber)
dim(dtotal)
## [1] 1091575      12
colnames(dtotal)
##  [1] "Date"                    "Series"                 
##  [3] "Market"                  "MarketName"             
##  [5] "Commodity"               "CommodityName"          
##  [7] "CurrencyCode"            "SettlementPrice"        
##  [9] "PreviousSettlementPrice" "Difference"             
## [11] "Impliedvolatility"       "ProductPrice"
unique(dtotal$Market)
## [1] "HSI" "WK1"
unique(dtotal$MarketName)
## [1] "HANG SENG FUTURES & OPTIONS"    "WEEKLY HANG SENG INDEX OPTIONS"
unique(dtotal$Commodity)
## [1] "HSI"
unique(dtotal$CurrencyCode)
## [1] "HKD"
  1. Extract the Strike, Maturity, timetoExpiration and filter the weekly and monthly options
weekly = dtotal%>% filter(MarketName=="WEEKLY HANG SENG INDEX OPTIONS")
weekly$Maturity <- ave(weekly$Date, weekly$Series, FUN = max)
weekly <- weekly %>% filter(nchar(Series)==13) %>% filter(MarketName=="WEEKLY HANG SENG INDEX OPTIONS") %>% mutate(timeToExpiration = as.numeric(Maturity - Date)) %>% mutate(strikePrice=as.numeric(substr(Series,4,8))) %>% select(Date,Series,MarketName,Impliedvolatility,ProductPrice,Maturity,timeToExpiration)
monthly = dtotal%>% filter(MarketName=="HANG SENG FUTURES & OPTIONS")
monthly$Maturity <- ave(monthly$Date, monthly$Series, FUN = max)
monthly <- monthly %>% filter(nchar(Series)==10) %>% filter(MarketName=="HANG SENG FUTURES & OPTIONS") %>% mutate(timeToExpiration = as.numeric(Maturity - Date)) %>% mutate(strikePrice=as.numeric(substr(Series,4,8))) %>% select(Date,Series,MarketName,Impliedvolatility,ProductPrice,Maturity,timeToExpiration)
  1. Inseet the putorcall columns according to the Series of weekly data.
putorcall=substr(weekly$Series,9,9)
weekly=cbind(weekly,putorcall)
  1. Split the data into dcall and dput with Strike, Maturity, timetoExpiration.
call_indicator = list("A","B","C","D","E","F","G","H","I","J","K","L")
put_indicator = list("M","N","O","P","Q","R","S","T","U","V","W","X")

dcall1 <- weekly %>% filter(putorcall %in% call_indicator)  %>% filter(Impliedvolatility != 0) %>% mutate(timeToExpiration = as.numeric(Maturity - Date)) %>% mutate(strikePrice=as.numeric(substr(Series,4,8))) %>% select(Date,Series,MarketName,Impliedvolatility,ProductPrice,timeToExpiration,strikePrice,putorcall)


dput1 <- weekly  %>% filter(putorcall %in% put_indicator) %>% filter(Impliedvolatility != 0)%>% mutate(strikePrice=as.numeric(substr(Series,4,8)))%>% select(Date,Series,MarketName,Impliedvolatility,ProductPrice,timeToExpiration,strikePrice,putorcall)
  1. OTM and ATM
otmp_1W <- dput1 %>% filter((strikePrice/ProductPrice>0.8)&(strikePrice/ProductPrice<0.95)) %>% filter(timeToExpiration>=0 & timeToExpiration<=7) %>% group_by(Date) %>% summarise(avg_otmp_1W=mean(Impliedvolatility))
otmp_1W
## # A tibble: 472 × 2
##    Date       avg_otmp_1W
##    <date>           <dbl>
##  1 2020-01-02        65.9
##  2 2020-01-03        25.5
##  3 2020-01-06        31.5
##  4 2020-01-07        37.1
##  5 2020-01-08        42.4
##  6 2020-01-09        67.0
##  7 2020-01-10        26.0
##  8 2020-01-13        35.4
##  9 2020-01-14        38.2
## 10 2020-01-15        48.0
## # … with 462 more rows
atmc_1W <- dcall1 %>% filter((strikePrice/ProductPrice>0.95)&(strikePrice/ProductPrice<1.05)) %>% filter(timeToExpiration>=0 & timeToExpiration<=7) %>% group_by(Date) %>% summarise(avg_atmc_1W =mean(Impliedvolatility))
atmc_1W
## # A tibble: 472 × 2
##    Date       avg_atmc_1W
##    <date>           <dbl>
##  1 2020-01-02        19.5
##  2 2020-01-03        16.4
##  3 2020-01-06        19.9
##  4 2020-01-07        17.7
##  5 2020-01-08        22.2
##  6 2020-01-09        19.4
##  7 2020-01-10        14.5
##  8 2020-01-13        17.8
##  9 2020-01-14        19.6
## 10 2020-01-15        18.8
## # … with 462 more rows
otmp_2W <- dput1 %>% filter((strikePrice/ProductPrice>0.8)&(strikePrice/ProductPrice<0.95)) %>% filter(timeToExpiration>=8&timeToExpiration<=14) %>% group_by(Date) %>% summarise(avg_otmp_2W = mean(Impliedvolatility))
otmp_2W
## # A tibble: 465 × 2
##    Date       avg_otmp_2W
##    <date>           <dbl>
##  1 2020-01-02        24.6
##  2 2020-01-03        15.7
##  3 2020-01-06        23.8
##  4 2020-01-07        24.1
##  5 2020-01-08        25.6
##  6 2020-01-09        25.4
##  7 2020-01-10        16.4
##  8 2020-01-13        21.2
##  9 2020-01-14        21.1
## 10 2020-01-15        21.7
## # … with 455 more rows
atmc_2W <- dcall1 %>% filter((strikePrice/ProductPrice>0.95)&(strikePrice/ProductPrice<1.05)) %>% filter(timeToExpiration>=8&timeToExpiration<=14) %>% group_by(Date) %>% summarise(avg_atmc_2W = mean(Impliedvolatility))
  1. Write the Weekly SKEW data
Combined_1W=merge(otmp_1W,atmc_1W,by=c("Date"),all.x=TRUE) %>% mutate("SKEW_1W" = avg_otmp_1W - avg_atmc_1W)
Combined_2W=merge(otmp_2W,atmc_2W,by=c("Date"),all.x=TRUE) %>% mutate("SKEW_2W" = avg_otmp_2W - avg_atmc_2W)

combined_W = merge(Combined_1W,Combined_2W,by=c("Date"),all.x=TRUE) 
combined_W = combined_W %>% select(Date,SKEW_1W,SKEW_2W)

write.csv(combined_W,"/Users/stevenqin/Desktop/5008 project/HSI_SKEW_1W+2W.csv", row.names = FALSE)
  1. Similarly, we do the same manipulations for the Monthly data as well.
putorcall=substr(monthly$Series,9,9)
monthly=cbind(monthly,putorcall)
call_indicator = list("A","B","C","D","E","F","G","H","I","J","K","L")
put_indicator = list("M","N","O","P","Q","R","S","T","U","V","W","X")

dcall2 <- monthly %>% filter(putorcall %in% call_indicator)  %>% filter(Impliedvolatility != 0) %>% mutate(timeToExpiration = as.numeric(Maturity - Date)) %>% mutate(strikePrice=as.numeric(substr(Series,4,8))) %>% select(Date,Series,MarketName,Impliedvolatility,ProductPrice,timeToExpiration,strikePrice,putorcall)


dput2 <- monthly  %>% filter(putorcall %in% put_indicator) %>% filter(Impliedvolatility != 0)%>% mutate(strikePrice=as.numeric(substr(Series,4,8)))%>% select(Date,Series,MarketName,Impliedvolatility,ProductPrice,timeToExpiration,strikePrice,putorcall)
otmp_1M <- dput2 %>% filter((strikePrice/ProductPrice>0.8)&(strikePrice/ProductPrice<0.95)) %>% filter(timeToExpiration>=15 & timeToExpiration<=45) %>% group_by(Date) %>% summarise(avg_otmp_1M=mean(Impliedvolatility))
otmp_1M
## # A tibble: 477 × 2
##    Date       avg_otmp_1M
##    <date>           <dbl>
##  1 2020-01-02        21.9
##  2 2020-01-03        23.3
##  3 2020-01-06        23.1
##  4 2020-01-07        23.3
##  5 2020-01-08        24.6
##  6 2020-01-09        22.3
##  7 2020-01-10        22.4
##  8 2020-01-13        22.1
##  9 2020-01-14        22.4
## 10 2020-01-15        22.3
## # … with 467 more rows
atmc_1M <- dcall2 %>% filter((strikePrice/ProductPrice>0.95)&(strikePrice/ProductPrice<1.05)) %>% filter(timeToExpiration>=15 & timeToExpiration<=45) %>% group_by(Date) %>% summarise(avg_atmc_1M =mean(Impliedvolatility))
atmc_1M
## # A tibble: 477 × 2
##    Date       avg_atmc_1M
##    <date>           <dbl>
##  1 2020-01-02        14.1
##  2 2020-01-03        14.8
##  3 2020-01-06        15.5
##  4 2020-01-07        14.2
##  5 2020-01-08        15.1
##  6 2020-01-09        13.6
##  7 2020-01-10        13.1
##  8 2020-01-13        14.2
##  9 2020-01-14        14.4
## 10 2020-01-15        14.1
## # … with 467 more rows
otmp_2M <- dput2 %>% filter((strikePrice/ProductPrice>0.8)&(strikePrice/ProductPrice<0.95)) %>% filter(timeToExpiration>=46&timeToExpiration<=90) %>% group_by(Date) %>% summarise(avg_otmp_2M = mean(Impliedvolatility))
otmp_2M
## # A tibble: 461 × 2
##    Date       avg_otmp_2M
##    <date>           <dbl>
##  1 2020-01-02        20.5
##  2 2020-01-03        21.5
##  3 2020-01-06        21.5
##  4 2020-01-07        21.4
##  5 2020-01-08        22.2
##  6 2020-01-09        20.6
##  7 2020-01-10        20.2
##  8 2020-01-13        20.3
##  9 2020-01-14        20.2
## 10 2020-01-15        20.1
## # … with 451 more rows
atmc_2M <- dcall2 %>% filter((strikePrice/ProductPrice>0.95)&(strikePrice/ProductPrice<1.05)) %>% filter(timeToExpiration>=46&timeToExpiration<=90) %>% group_by(Date) %>% summarise(avg_atmc_2M = mean(Impliedvolatility))
Combined_1M=merge(otmp_1M,atmc_1M,by=c("Date"),all.x=TRUE) %>% mutate("SKEW_1M" = avg_otmp_1M - avg_atmc_1M)
Combined_2M=merge(otmp_2M,atmc_2M,by=c("Date"),all.x=TRUE) %>% mutate("SKEW_2M" = avg_otmp_2M - avg_atmc_2M)

combined_M = merge(Combined_1M,Combined_2M,by=c("Date"),all.x=TRUE) 
combined_M = combined_M %>% select(Date,SKEW_1M,SKEW_2M)

write.csv(combined_M,"/Users/stevenqin/Desktop/5008 project/HSI_SKEW_1M+2M.csv", row.names = FALSE)

SKEWS of APPLE, TSLA and SPX

setwd("/Users/stevenqin/Desktop/5008 project/APPLE_OPTIONS_CSV")
fnames <- list.files()
csv <- lapply(fnames, read.csv)
df_aapl <- do.call(rbind, csv)

setwd("/Users/stevenqin/Desktop/5008 project/TESLA_OPTION_CSV")
fnames <- list.files()
csv <- lapply(fnames, read.csv)
df_tsla <- do.call(rbind, csv)

setwd("/Users/stevenqin/Desktop/5008 project/SPX500_OPTIONS_CSV")
fnames <- list.files()
csv <- lapply(fnames, read.csv)
df_spx <- do.call(rbind, csv)

Condidering the different valatility will influence the ATM and OTM range, we will change the range based on the sigma of log Return of each underlying

setwd("/Users/stevenqin/Desktop/5008 project")
TSLA = read_csv('TSLA.csv')
## Rows: 505 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (6): Open, High, Low, Close, Adj Close, Volume
## date (1): Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
HSI = read_csv('HSI.csv')
## Rows: 494 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (6): Open, High, Low, Close, Adj Close, Volume
## date (1): Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
AAPL = read_csv('Apple_Stock.csv')
## Rows: 504 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (6): Open, High, Low, Close, Adj Close, Volume
## date (1): Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
SPX = read_csv('NEWSP500.csv')
## Rows: 505 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): Date, Change %
## dbl (10): Price, Open, High, Low, SMA5, SMA20, SMA50, RSI, MOMEN, ATR
## lgl  (1): Vol.
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
sigma_TSLA = sd(log(1+(TSLA$`Adj Close`[-1]-TSLA$`Adj Close`[-nrow(TSLA)])/TSLA$`Adj Close`[-nrow(TSLA)]))
sigma_HSI = sd(log(1+(HSI$`Adj Close`[-1]-HSI$`Adj Close`[-nrow(HSI)])/HSI$`Adj Close`[-nrow(HSI)]))
sigma_AAPL = sd(log(1+(AAPL$`Adj Close`[-1]-AAPL$`Adj Close`[-nrow(AAPL)])/AAPL$`Adj Close`[-nrow(AAPL)]))
sigma_SPX = sd(log(1+(SPX$`Price` [-1]-SPX$`Price`[-nrow(SPX)])/SPX$`Price`[-nrow(SPX)]))
print('sigma_TSLA is') 
## [1] "sigma_TSLA is"
cat(sigma_TSLA)
## 0.04675678
print('sigma_HSI is') 
## [1] "sigma_HSI is"
cat(sigma_HSI)
## 0.01375497
print('sigma_AAPL is') 
## [1] "sigma_AAPL is"
cat(sigma_AAPL)
## 0.02362969
print('sigma_SPX is') 
## [1] "sigma_SPX is"
cat(sigma_SPX)
## 0.01652697

Then similary, we can compute the SKEWs repondingly

#APPLE

df_aapl1 = df_aapl %>% select("X.QUOTE_DATE.",
                    "X.STRIKE." ,
                   "X.UNDERLYING_LAST." ,
                   "X.EXPIRE_DATE.",
                   "X.DTE." ,
                   "X.STRIKE_DISTANCE_PCT.",
                   "X.P_IV.",
                   "X.C_IV.",
                  ) %>% filter( !is.na(X.P_IV.)) %>% filter(between(X.STRIKE_DISTANCE_PCT., 0.1,0.2)) 

put_1m = df_aapl1 %>% filter(between(X.DTE., 15,45)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(put_avg_score_1m =mean(`X.P_IV.`,na.rm = TRUE))
put_1w = df_aapl1 %>% filter(between(X.DTE., 0,7)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(put_avg_score_1w =mean(`X.P_IV.`,na.rm = TRUE))
put_2w = df_aapl1 %>% filter(between(X.DTE., 8,14)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(put_avg_score_2w =mean(`X.P_IV.`,na.rm = TRUE))
put_2m = df_aapl1 %>% filter(between(X.DTE., 46,90)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(put_avg_score_2m =mean(`X.P_IV.`,na.rm = TRUE))


df_aapl1 = df_aapl %>% select("X.QUOTE_DATE.",
                    "X.STRIKE." ,
                   "X.UNDERLYING_LAST." ,
                   "X.EXPIRE_DATE.",
                   "X.DTE." ,
                   "X.STRIKE_DISTANCE_PCT.",
                   "X.P_IV.",
                   "X.C_IV.",
                  ) %>% filter( !is.na(X.C_IV.)) %>% filter(between(X.STRIKE_DISTANCE_PCT., 0,0.1)) 

call_1m = df_aapl1 %>% filter(between(X.DTE., 15,45)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(call_avg_score_1m =mean(`X.C_IV.`,na.rm = TRUE))
call_1w = df_aapl1 %>% filter(between(X.DTE., 0,7)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(call_avg_score_1w =mean(`X.C_IV.`,na.rm = TRUE))
call_2w = df_aapl1 %>% filter(between(X.DTE., 8,14)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(call_avg_score_2w =mean(`X.C_IV.`,na.rm = TRUE))
call_2m = df_aapl1 %>% filter(between(X.DTE., 46,90)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(call_avg_score_2m =mean(`X.C_IV.`,na.rm = TRUE))

combined_1m =merge(call_1m,put_1m,by=c("X.QUOTE_DATE."),all.x=TRUE) %>% mutate("SKEW_1M" = `put_avg_score_1m`- `call_avg_score_1m`)
combined_1w =merge(call_1w,put_1w,by=c("X.QUOTE_DATE."),all.x=TRUE) %>% mutate("SKEW_1W" = `put_avg_score_1w`- `call_avg_score_1w`)
combined_2w =merge(call_2w,put_2w,by=c("X.QUOTE_DATE."),all.x=TRUE) %>% mutate("SKEW_2W" = `put_avg_score_2w`- `call_avg_score_2w`)
combined_2m =merge(call_2m,put_2m,by=c("X.QUOTE_DATE."),all.x=TRUE) %>% mutate("SKEW_2M" = `put_avg_score_2m`- `call_avg_score_2m`)

combined_aapl <- combined_1m %>%
              left_join(combined_1w, by='X.QUOTE_DATE.') %>%
              left_join(combined_2w, by='X.QUOTE_DATE.')%>%
              left_join(combined_2m, by='X.QUOTE_DATE.')
#TSLA
df_tsla1 = df_tsla %>% select("X.QUOTE_DATE.",
                    "X.STRIKE." ,
                   "X.UNDERLYING_LAST." ,
                   "X.EXPIRE_DATE.",
                   "X.DTE." ,
                   "X.STRIKE_DISTANCE_PCT.",
                   "X.P_IV.",
                   "X.C_IV.",
                  ) %>% filter( !is.na(X.P_IV.)) %>% filter(between(X.STRIKE_DISTANCE_PCT., 0.2,0.4)) 

put_1m = df_tsla1 %>% filter(between(X.DTE., 15,45)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(put_avg_score_1m =mean(`X.P_IV.`,na.rm = TRUE))
put_1w = df_tsla1 %>% filter(between(X.DTE., 0,7)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(put_avg_score_1w =mean(`X.P_IV.`,na.rm = TRUE))
put_2w = df_tsla1 %>% filter(between(X.DTE., 8,14)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(put_avg_score_2w =mean(`X.P_IV.`,na.rm = TRUE))
put_2m = df_tsla1 %>% filter(between(X.DTE., 46,90)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(put_avg_score_2m =mean(`X.P_IV.`,na.rm = TRUE))


df_tsla1 = df_tsla %>% select("X.QUOTE_DATE.",
                    "X.STRIKE." ,
                   "X.UNDERLYING_LAST." ,
                   "X.EXPIRE_DATE.",
                   "X.DTE." ,
                   "X.STRIKE_DISTANCE_PCT.",
                   "X.P_IV.",
                   "X.C_IV.",
                  ) %>% filter( !is.na(X.C_IV.)) %>% filter(between(X.STRIKE_DISTANCE_PCT., 0,0.2)) 

call_1m = df_tsla1 %>% filter(between(X.DTE., 15,45)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(call_avg_score_1m =mean(`X.C_IV.`,na.rm = TRUE))
call_1w = df_tsla1 %>% filter(between(X.DTE., 0,7)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(call_avg_score_1w =mean(`X.C_IV.`,na.rm = TRUE))
call_2w = df_tsla1 %>% filter(between(X.DTE., 8,14)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(call_avg_score_2w =mean(`X.C_IV.`,na.rm = TRUE))
call_2m = df_tsla1 %>% filter(between(X.DTE., 46,90)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(call_avg_score_2m =mean(`X.C_IV.`,na.rm = TRUE))

combined_1m =merge(call_1m,put_1m,by=c("X.QUOTE_DATE."),all.x=TRUE) %>% mutate("SKEW_1M" = `put_avg_score_1m`- `call_avg_score_1m`)
combined_1w =merge(call_1w,put_1w,by=c("X.QUOTE_DATE."),all.x=TRUE) %>% mutate("SKEW_1W" = `put_avg_score_1w`- `call_avg_score_1w`)
combined_2w =merge(call_2w,put_2w,by=c("X.QUOTE_DATE."),all.x=TRUE) %>% mutate("SKEW_2W" = `put_avg_score_2w`- `call_avg_score_2w`)
combined_2m =merge(call_2m,put_2m,by=c("X.QUOTE_DATE."),all.x=TRUE) %>% mutate("SKEW_2M" = `put_avg_score_2m`- `call_avg_score_2m`)

combined_tsla <- combined_1m %>%
              left_join(combined_1w, by='X.QUOTE_DATE.') %>%
              left_join(combined_2w, by='X.QUOTE_DATE.')%>%
              left_join(combined_2m, by='X.QUOTE_DATE.')
#SPX
df_spx1 = df_spx %>% select("X.QUOTE_DATE.",
                    "X.STRIKE." ,
                   "X.UNDERLYING_LAST." ,
                   "X.EXPIRE_DATE.",
                   "X.DTE." ,
                   "X.STRIKE_DISTANCE_PCT.",
                   "X.P_IV.",
                   "X.C_IV.",
                  ) %>% filter( !is.na(X.P_IV.)) %>% filter(between(X.STRIKE_DISTANCE_PCT., 0.05,0.2)) 

put_1m = df_spx1 %>% filter(between(X.DTE., 15,45)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(put_avg_score_1m =mean(`X.P_IV.`,na.rm = TRUE))
put_1w = df_spx1 %>% filter(between(X.DTE., 0,7)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(put_avg_score_1w =mean(`X.P_IV.`,na.rm = TRUE))
put_2w = df_spx1 %>% filter(between(X.DTE., 8,14)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(put_avg_score_2w =mean(`X.P_IV.`,na.rm = TRUE))
put_2m = df_spx1 %>% filter(between(X.DTE., 46,90)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(put_avg_score_2m =mean(`X.P_IV.`,na.rm = TRUE))


df_spx1 = df_spx %>% select("X.QUOTE_DATE.",
                    "X.STRIKE." ,
                   "X.UNDERLYING_LAST." ,
                   "X.EXPIRE_DATE.",
                   "X.DTE." ,
                   "X.STRIKE_DISTANCE_PCT.",
                   "X.P_IV.",
                   "X.C_IV.",
                  ) %>% filter( !is.na(X.C_IV.)) %>% filter(between(X.STRIKE_DISTANCE_PCT., 0,0.05)) 

call_1m = df_spx1 %>% filter(between(X.DTE., 15,45)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(call_avg_score_1m =mean(`X.C_IV.`,na.rm = TRUE))
call_1w = df_spx1 %>% filter(between(X.DTE., 0,7)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(call_avg_score_1w =mean(`X.C_IV.`,na.rm = TRUE))
call_2w = df_spx1 %>% filter(between(X.DTE., 8,14)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(call_avg_score_2w =mean(`X.C_IV.`,na.rm = TRUE))
call_2m = df_spx1 %>% filter(between(X.DTE., 46,90)) %>% group_by(X.QUOTE_DATE.) %>% dplyr::summarise(call_avg_score_2m =mean(`X.C_IV.`,na.rm = TRUE))

combined_1m =merge(call_1m,put_1m,by=c("X.QUOTE_DATE."),all.x=TRUE) %>% mutate("SKEW_1M" = `put_avg_score_1m`- `call_avg_score_1m`)
combined_1w =merge(call_1w,put_1w,by=c("X.QUOTE_DATE."),all.x=TRUE) %>% mutate("SKEW_1W" = `put_avg_score_1w`- `call_avg_score_1w`)
combined_2w =merge(call_2w,put_2w,by=c("X.QUOTE_DATE."),all.x=TRUE) %>% mutate("SKEW_2W" = `put_avg_score_2w`- `call_avg_score_2w`)
combined_2m =merge(call_2m,put_2m,by=c("X.QUOTE_DATE."),all.x=TRUE) %>% mutate("SKEW_2M" = `put_avg_score_2m`- `call_avg_score_2m`)

combined_spx <- combined_1m %>%
              left_join(combined_1w, by='X.QUOTE_DATE.') %>%
              left_join(combined_2w, by='X.QUOTE_DATE.')%>%
              left_join(combined_2m, by='X.QUOTE_DATE.')

Turn the results into csv file

result_aapl = combined_aapl %>% select("X.QUOTE_DATE.",
                                 "SKEW_1W",
                                 "SKEW_2W",
                                 "SKEW_1M",
                                 "SKEW_2M",)
write.csv(result_aapl,'AAPL_SKEW.csv')

result_tsla = combined_tsla %>% select("X.QUOTE_DATE.",
                                 "SKEW_1W",
                                 "SKEW_2W",
                                 "SKEW_1M",
                                 "SKEW_2M",)
write.csv(result_tsla,'TSLA_SKEW.csv')

result_spx = combined_spx %>% select("X.QUOTE_DATE.",
                                 "SKEW_1W",
                                 "SKEW_2W",
                                 "SKEW_1M",
                                 "SKEW_2M",)
write.csv(result_spx,'SPX_SKEW.csv')

Some plots about the skew data

library(ggplot2)
plot(result_spx) 

#HSI
ggplot(data=combined_W, aes(x=Date, y=SKEW_1W, group=1)) +
  geom_line()+
  geom_point()

ggplot(data=combined_W, aes(x=Date, y=SKEW_2W, group=1)) +
  geom_line()+
  geom_point()
## Warning: Removed 5 rows containing missing values (`geom_line()`).
## Warning: Removed 29 rows containing missing values (`geom_point()`).

ggplot(data=combined_M, aes(x=Date, y=SKEW_1M, group=1)) +
  geom_line()+
  geom_point()

ggplot(data=combined_M, aes(x=Date, y=SKEW_2M, group=1)) +
  geom_line()+
  geom_point()
## Warning: Removed 23 rows containing missing values (`geom_line()`).
## Warning: Removed 23 rows containing missing values (`geom_point()`).

#AAPL
ggplot(data=result_aapl, aes(x=X.QUOTE_DATE., y=SKEW_1W, group=1)) +
  geom_line()+
  geom_point()

ggplot(data=result_aapl, aes(x=X.QUOTE_DATE., y=SKEW_2W, group=1)) +
  geom_line()+
  geom_point()
## Warning: Removed 10 rows containing missing values (`geom_point()`).

ggplot(data=result_aapl, aes(x=X.QUOTE_DATE., y=SKEW_1M, group=1)) +
  geom_line()+
  geom_point()

ggplot(data=result_aapl, aes(x=X.QUOTE_DATE., y=SKEW_2M, group=1)) +
  geom_line()+
  geom_point()
## Warning: Removed 2 rows containing missing values (`geom_point()`).

#TSLA
ggplot(data=result_tsla, aes(x=X.QUOTE_DATE., y=SKEW_1W, group=1)) +
  geom_line()+
  geom_point()

ggplot(data=result_tsla, aes(x=X.QUOTE_DATE., y=SKEW_2W, group=1)) +
  geom_line()+
  geom_point()
## Warning: Removed 10 rows containing missing values (`geom_point()`).

ggplot(data=result_tsla, aes(x=X.QUOTE_DATE., y=SKEW_1M, group=1)) +
  geom_line()+
  geom_point()

ggplot(data=result_tsla, aes(x=X.QUOTE_DATE., y=SKEW_2W, group=1)) +
  geom_line()+
  geom_point()
## Warning: Removed 10 rows containing missing values (`geom_point()`).

#SPX
ggplot(data=result_spx, aes(x=X.QUOTE_DATE., y=SKEW_1W, group=1)) +
  geom_line()+
  geom_point()

ggplot(data=result_spx, aes(x=X.QUOTE_DATE., y=SKEW_2W, group=1)) +
  geom_line()+
  geom_point()

ggplot(data=result_spx, aes(x=X.QUOTE_DATE., y=SKEW_1M, group=1)) +
  geom_line()+
  geom_point()

ggplot(data=result_spx, aes(x=X.QUOTE_DATE., y=SKEW_2M, group=1)) +
  geom_line()+
  geom_point()

#HSI
hist(combined_W$SKEW_1W, col='#5BBCBF', xlim=)
hist(combined_W$SKEW_2W, col='#93402E', add=TRUE)
hist(combined_M$SKEW_1M, col='#D5D76C', add=TRUE)
hist(combined_M$SKEW_2M, col='#A771E1', add=TRUE)

#AAPL
hist(result_aapl$SKEW_1W, col='#5BBCBF', xlim=)
hist(result_aapl$SKEW_2W, col='#93402E', add=TRUE)
hist(result_aapl$SKEW_1M, col='#D5D76C', add=TRUE)
hist(result_aapl$SKEW_2M, col='#A771E1', add=TRUE)

#TSLA
hist(result_tsla$SKEW_1W, col='#5BBCBF', xlim=)
hist(result_tsla$SKEW_2W, col='#93402E', add=TRUE)
hist(result_tsla$SKEW_1M, col='#D5D76C', add=TRUE)
hist(result_tsla$SKEW_2M, col='#A771E1', add=TRUE)

#TSLA
hist(result_spx$SKEW_1W, col='#5BBCBF', xlim=)
hist(result_spx$SKEW_2W, col='#93402E', add=TRUE)
hist(result_spx$SKEW_1M, col='#D5D76C', add=TRUE)
hist(result_spx$SKEW_2M, col='#A771E1', add=TRUE)